Question 1
# Import package
import pandas as pd
import plotly.express as px
import numpy as np
import plotly.graph_objects as go
dat1=pd.read_csv("https://raw.githubusercontent.com/bcaffo/ds4bme_intro/master/data/kirby21.csv")
## load in the hierarchy information
url = "https://raw.githubusercontent.com/bcaffo/MRIcloudT1volumetrics/master/inst/extdata/multilevel_lookup_table.txt"
multilevel_lookup = pd.read_csv(url, sep = "\t").drop(['Level5'], axis = 1)
multilevel_lookup = multilevel_lookup.rename(columns = {
"modify" : "roi",
"modify.1" : "level4",
"modify.2" : "level3",
"modify.3" : "level2",
"modify.4" : "level1"})
multilevel_lookup = multilevel_lookup[['roi', 'level4', 'level3', 'level2', 'level1']]
multilevel_lookup.head()
| roi | level4 | level3 | level2 | level1 | |
|---|---|---|---|---|---|
| 0 | SFG_L | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L |
| 1 | SFG_R | SFG_R | Frontal_R | CerebralCortex_R | Telencephalon_R |
| 2 | SFG_PFC_L | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L |
| 3 | SFG_PFC_R | SFG_R | Frontal_R | CerebralCortex_R | Telencephalon_R |
| 4 | SFG_pole_L | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L |
## Now load in the subject data
id = 127
subjectData = pd.read_csv("https://raw.githubusercontent.com/smart-stats/ds4bio_book/main/book/assetts/kirby21AllLevels.csv")
subjectData = subjectData.loc[(subjectData.type == 1) & (subjectData.level == 5) & (subjectData.id == id)]
subjectData = subjectData[['roi', 'volume']]
## Merge the subject data with the multilevel data
subjectData = pd.merge(subjectData, multilevel_lookup, on = "roi")
subjectData = subjectData.assign(icv = "ICV")
subjectData = subjectData.assign(comp = subjectData.volume / np.sum(subjectData.volume))
subjectData.head()
| roi | volume | level4 | level3 | level2 | level1 | icv | comp | |
|---|---|---|---|---|---|---|---|---|
| 0 | SFG_L | 12926 | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L | ICV | 0.009350 |
| 1 | SFG_R | 10050 | SFG_R | Frontal_R | CerebralCortex_R | Telencephalon_R | ICV | 0.007270 |
| 2 | SFG_PFC_L | 12783 | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L | ICV | 0.009247 |
| 3 | SFG_PFC_R | 11507 | SFG_R | Frontal_R | CerebralCortex_R | Telencephalon_R | ICV | 0.008324 |
| 4 | SFG_pole_L | 3078 | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L | ICV | 0.002227 |
# Group by level1 and sum up the composition for each group
group1 = subjectData.groupby(['icv', 'level1']).agg({'comp': 'sum', 'level1': 'first'})
# Group by level1 and level2, and sum up the composition for each group
group2 = subjectData.groupby(['level1', 'level2']).agg({'comp': 'sum', 'level2': 'first', 'level1': 'first'})
group2 = group2.rename(columns={'level1': 'levelone'})
# Count the number of groups for each level1 and level2
group2_count = group2.groupby('levelone')['level2'].agg(['count', 'first']).reset_index()
# Group by level2 and level3, and sum up the composition for each group
group3 = subjectData.groupby(['level2', 'level3']).agg({'comp': 'sum', 'level3': 'first', 'level2': 'first'})
group3 = group3.rename(columns={'level2': 'leveltwo'})
# Count the number of groups for each level2 and level3
group3_count = group3.groupby('leveltwo')['level3'].agg(['count', 'first']).reset_index()
# Combine group counts for level1-level2 and level2-level3
source1 = [0] * len(group1.comp)
source2 = group2_count['count'].tolist()
source3 = group3_count['count'].tolist()
m = len(source2) + len(source3)
a = list(range(1, m+1))
b = source2 + source3
c = []
for i in range(len(a)):
c.extend([a[i]] * b[i])
c = source1 + c
# Combine composition values and labels for each group
combined_value = []
combined_value.extend(group1['comp'].tolist())
combined_value.extend(group2['comp'].tolist())
combined_value.extend(group3['comp'].tolist())
combined_label = []
combined_label.extend(['ICV'])
combined_label.extend(group1['level1'].tolist())
combined_label.extend(group2['level2'].tolist())
combined_label.extend(group3['level3'].tolist())
# Create the Sankey plot
source = c
target = list(range(1, len(c)+1))
value = combined_value
label = combined_label
link = dict(source=source, target=target, value=value)
node = dict(label=label)
data = go.Sankey(link=link, node=node)
fig = go.Figure(data)
fig